> # DHS Calendar Tutorial - Example 1 > # Basic string manipulation > > # download the model dataset for individual women's recode: "ZZIR62FL.DTA" > # the model datasets are available at http://dhsprogram.com/data/download-model-datasets.cfm > > # install the foreign package to be able to read the data > #install.packages("foreign") > # load the library foreign > library(foreign) > > # read the stata dataset into R, without converting factors > dta <- read.dta("C:/Data/DHS_model/ZZIR62FL.dta", convert.factors = FALSE) > > # attach the data frame so I don't need to refer to it all of the time > attach(dta) > > > # 1) display column 1 of the calendar for the first 6 respondents > head(vcal_1) [1] " 00000BPPPPPPPP00000000000000000000000BPPPPPPPP00000000000000000000" [2] " PPPPPP000000000000000000000000BPPPPPPPP000000000000000000000000000" [3] " 000000000000000000000000000000000000000000000000000000000000000000" [4] " 0000000000BPPPPPPPP00000000000BPPPPPPPP000000000000000000000000000" [5] " 0BPPPPPPPP000000000000000000000000BPPPPPPPP00000000000000000000000" [6] " 000000000000000000000000000000000000000000000000000000000000000BPP" > > > # 2) calculate the full length of calendar by displaying length of strings > vcal_len <- nchar(vcal_1) > head(vcal_len) [1] 80 80 80 80 80 80 > > > # 3) take a piece of a string from column 1 > piece <- substr(vcal_1,44,55) > head(piece) [1] "00000000BPPP" "0BPPPPPPPP00" "000000000000" "0BPPPPPPPP00" "00000BPPPPPP" "000000000000" > > > # 4) find the position of a substring within a string > pos <- regexpr("P",vcal_1,fixed=TRUE) > head(pos) [1] 21 15 -1 26 17 79 > > > # 5) reverse a string > strReverse <- function(x) + sapply(lapply(strsplit(x, NULL), rev), paste, collapse="") > rev_cal <- strReverse(vcal_1) > head(rev_cal) [1] "00000000000000000000PPPPPPPPB00000000000000000000000PPPPPPPPB00000 " [2] "000000000000000000000000000PPPPPPPPB000000000000000000000000PPPPPP " [3] "000000000000000000000000000000000000000000000000000000000000000000 " [4] "000000000000000000000000000PPPPPPPPB00000000000PPPPPPPPB0000000000 " [5] "00000000000000000000000PPPPPPPPB000000000000000000000000PPPPPPPPB0 " [6] "PPB000000000000000000000000000000000000000000000000000000000000000 " > > > # 6) trim a string of leading and trailing spaces > trim <- function (x) gsub("^\\s+|\\s+$", "", x) > trim_cal <- trim(vcal_1) > head(trim_cal) [1] "00000BPPPPPPPP00000000000000000000000BPPPPPPPP00000000000000000000" [2] "PPPPPP000000000000000000000000BPPPPPPPP000000000000000000000000000" [3] "000000000000000000000000000000000000000000000000000000000000000000" [4] "0000000000BPPPPPPPP00000000000BPPPPPPPP000000000000000000000000000" [5] "0BPPPPPPPP000000000000000000000000BPPPPPPPP00000000000000000000000" [6] "000000000000000000000000000000000000000000000000000000000000000BPP" > > > # 7) display the length of calendar actually used, from the trimmed version > vcal_used <- nchar(trim_cal) > # should be the same as v019 > head(vcal_used) [1] 66 66 66 66 66 66 > head(v019) [1] 66 66 66 66 66 66